1   //==============================================================================
2   // file :       XMLDocumentHandlerDOM.java
3   // project:     Lucene Search System
4   //
5   // last change: date:       $Date: 2003/09/09 03:11:52 $
6   //              by:         $Author: bitiboy $
7   //              revision:   $Revision: 1.1 $
8   //------------------------------------------------------------------------------
9   // copyright:   GNU GPL Software License (see class documentation)
10  //==============================================================================
11  
12  package com.justhis.lucene.xml;
13  
14  import org.apache.lucene.document.Field;
15  
16  /*
17   * $Id: XMLDocumentHandlerDOM.java,v 1.1 2003/09/09 03:11:52 bitiboy Exp $
18   *
19   * Copyright 2003 Acai Software All Rights Reserved.
20   *
21   * This file LuceneException.java is part of the Lucene Search System.
22  
23   * The Lucene Search System is free software; you can redistribute it and/or modify
24   * it under the terms of the GNU General Public License as published by
25   * the Free Software Foundation; either version 2 of the License, or
26   * (at your option) any later version.
27  
28   * Lucene Search System is distributed in the hope that it will be useful,
29   * but WITHOUT ANY WARRANTY; without even the implied warranty of
30   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31   * GNU General Public License for more details.
32  
33   * You should have received a copy of the GNU General Public License
34   * along with the Lucene Search System; if not, write to the Free Software
35   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
36  
37   * http://www.justhis.com http://ejb.cn
38   * CONTACT: email = webmaster@justhis.com superaxis@sohu.com
39   */
40  import org.w3c.dom.*;
41  import org.w3c.dom.Node;
42  
43  import java.io.File;
44  
45  import javax.xml.parsers.*;
46  
47  
48  /***
49   * ????DOM????XML??????????????  ??????XML????????????????Jdom????????????
50   *
51   * @author <a href="http://blog.ejb.cn">acai</a>
52   * @version $Revision: 1.1 $
53   */
54  public class XMLDocumentHandlerDOM {
55      //~ Methods ----------------------------------------------------------------
56  
57      /***
58       * ????????lucene??????  ????????????lucene????????
59       *
60       * @param f
61       *
62       * @return TODO
63       */
64      public org.apache.lucene.document.Document createXMLDocument(File f) {
65          org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
66          DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
67  
68          try {
69              DocumentBuilder df = dbf.newDocumentBuilder();
70              org.w3c.dom.Document d = df.parse(f);
71              Node root = d.getDocumentElement();
72              traverseTree(root, document);
73          } catch (Exception e) {
74              System.out.println("error: " + e);
75              e.printStackTrace();
76          }
77  
78          return document;
79      }
80  
81      /***
82       * ??XML????????????????????????lucenen????????
83       *
84       * @param node
85       * @param document
86       */
87      private static void traverseTree(Node node,
88                                       org.apache.lucene.document.Document document
89                                      ) {
90          NodeList nl = node.getChildNodes();
91  
92          if (nl.getLength() == 0) {
93              if (node.getNodeType() == Node.TEXT_NODE) {
94                  Node parentNode = node.getParentNode();
95  
96                  if (parentNode.getNodeType() == Node.ELEMENT_NODE) {
97                      String parentNodeName = parentNode.getNodeName();
98  
99                      // 		    String nodeValue = node.getNodeValue();
100                     // 		    if (parentNodeName.equals("name"))
101                     // 		    {
102                     Node siblingNode = node.getNextSibling();
103 
104                     if (siblingNode != null) {
105                         if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE
106                            ) {
107                             document.add(Field.Text("name",
108                                                     siblingNode.getNodeValue()
109                                                    )
110                                         );
111                         }
112                     }
113 
114                     // 		    }
115                     // 		    else if (parentNodeName.equals("profession"))
116                     // 		    {
117                     // 			Node siblingNode = node.getNextSibling();
118                     // 			if (siblingNode != null)
119                     //                         {
120                     // 			    if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
121                     //                             {
122                     // 				document.add(Field.Text([arentNodeName, siblingNode.getNodeValue()));
123                     // 			    }
124                     // 			}
125                     // 		    }
126                     // 		    else if (parentNodeName == "addressLine1")
127                     //                     {
128                     // 			Node siblingNode = node.getNextSibling();
129                     // 			if(siblingNode != null)
130                     // 			{
131                     // 			    if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
132                     // 		            {
133                     // 				document.add(Field.Text("addressLine1", siblingNode.getNodeValue()));
134                     // 			    }
135                     // 			}
136                     // 		    }
137                     // 		    else if (parentNodeName.equals("addressLine2"))
138                     // 		    {
139                     // 			Node siblingNode = node.getNextSibling();
140                     // 			if (siblingNode != null)
141                     // 			{
142                     // 			    if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
143                     // 			    {
144                     // 				document.add(Field.Text("addressLine2", siblingNode.getNodeValue()));
145                     // 			    }
146                     // 			}
147                     // 		    }
148                     // 		    if (parentNodeName.equals("city"))
149                     // 		    {
150                     // 			Node siblingNode = node.getNextSibling();
151                     // 			if (siblingNode != null)
152                     //                         {
153                     // 			    if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
154                     // 			    {
155                     // 				document.add(Field.Text("city", siblingNode.getNodeValue()));
156                     // 			    }
157                     // 			}
158                     // 		    }
159                     // 		    else if (parentNodeName.equals("zip"))
160                     // 		    {
161                     // 			Node siblingNode = node.getNextSibling();
162                     // 			if (siblingNode != null)
163                     // 			{
164                     // 			    if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
165                     // 			    {
166                     // 				document.add(Field.Text("zip", siblingNode.getNodeValue()));
167                     // 			    }
168                     // 			}
169                     // 		    }
170                     // 		    else if (parentNodeName.equals("state"))
171                     // 		    {
172                     // 			Node siblingNode = node.getNextSibling();
173                     // 			if (siblingNode != null)
174                     // 			{
175                     // 			    if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
176                     // 			    {
177                     // 				document.add(Field.Text("state", siblingNode.getNodeValue()));
178                     // 			    }
179                     // 			}
180                     // 		    }
181                     // 		    else if (parentNodeName.equals("country"))
182                     // 		    {
183                     // 			Node siblingNode = node.getNextSibling();
184                     // 			if (siblingNode != null)
185                     // 			{
186                     // 			    if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
187                     // 			    {
188                     // 				document.add(Field.Text("country", siblingNode.getNodeValue()));
189                     // 			    }
190                     // 			}
191                     // 		    }
192                 }
193             }
194         } else {
195             for (int i = 0; i < nl.getLength(); i++) {
196                 traverseTree(nl.item(i), document);
197             }
198         }
199     }
200 }
201 /*
202  * $Log: XMLDocumentHandlerDOM.java,v $
203  * Revision 1.1  2003/09/09 03:11:52  bitiboy
204  * *** empty log message ***
205  *
206  * Revision 1.1  2003/09/09 00:54:45  bitiboy
207  * *** empty log message ***
208  *
209  * Revision 1.1  2003/09/07 08:23:50  superaxis
210  * *** empty log message ***
211  *
212  *
213 */
This page was automatically generated by Maven